import pandas as pd
import plotly.express as px
import numpy as np
import plotly.graph_objects as go
df4 = pd.read_csv("https://raw.githubusercontent.com/opencasestudies/ocs-bp-opioid-rural-urban/master/data/simpler_import/county_annual.csv")
df4
| Unnamed: 0 | BUYER_COUNTY | BUYER_STATE | year | count | DOSAGE_UNIT | countyfips | |
|---|---|---|---|---|---|---|---|
| 0 | 1 | ABBEVILLE | SC | 2006 | 877 | 363620.0 | 45001.0 |
| 1 | 2 | ABBEVILLE | SC | 2007 | 908 | 402940.0 | 45001.0 |
| 2 | 3 | ABBEVILLE | SC | 2008 | 871 | 424590.0 | 45001.0 |
| 3 | 4 | ABBEVILLE | SC | 2009 | 930 | 467230.0 | 45001.0 |
| 4 | 5 | ABBEVILLE | SC | 2010 | 1197 | 539280.0 | 45001.0 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 27753 | 27754 | NaN | NV | 2007 | 447 | 200600.0 | NaN |
| 27754 | 27755 | NaN | NV | 2008 | 5 | 2200.0 | NaN |
| 27755 | 27756 | NaN | OH | 2006 | 23 | 5100.0 | NaN |
| 27756 | 27757 | NaN | PR | 2006 | 10 | 17800.0 | NaN |
| 27757 | 27758 | NaN | PR | 2007 | 2 | 1300.0 | NaN |
27758 rows × 7 columns
df4 = df4.assign(pills = pd.to_numeric((df4.DOSAGE_UNIT))/1000000)
df4_year = df4
df4_year = df4_year.groupby(['year']).pills.mean().reset_index().rename(columns = {'pills' : 'average_pills'})
df4_year
| year | average_pills | |
|---|---|---|
| 0 | 2006 | 2.645410 |
| 1 | 2007 | 2.992099 |
| 2 | 2008 | 3.252162 |
| 3 | 2009 | 3.526038 |
| 4 | 2010 | 3.783656 |
| 5 | 2011 | 4.035583 |
| 6 | 2012 | 3.993064 |
| 7 | 2013 | 3.861752 |
| 8 | 2014 | 3.768738 |
fig = px.scatter(df4_year, x="year", y="average_pills",
labels={
"year": "Year",
"average_pills": "Number of pills in millions",
},
title="Average Number of Opioid Pills Shipped to a U.S. County")
fig.show()